
# R script for generating main ranking results from the prepared datasets
# This file replicates Tables 1-3 in the main text and A3-A5 in the appendix

rm(list = ls())

if (!require('tidyverse')) install.packages('tidyverse')
if (!require('haven')) install.packages('haven')
if (!require('readxl')) install.packages('readxl')
if (!require('knitr')) install.packages('knitr')
if (!require('kableExtra')) install.packages('kableExtra')

export_tables <- TRUE
if (export_tables & !dir.exists('Tables/')) dir.create('Tables/')

# Load data ---------------------------------------------------------------

# Make sure the prepared datasets are placed in the same (sub)dir as this script

dat_all = readRDS("Dataset_cleaned.RDS")

dat_pub = dat_all$`publication-level`
dat_falc = dat_all$`faculty-level`
dat_dept = dat_all$`department-level`

rm(dat_all)

# Define main helpers -----------------------------------------------------

rank_by_score = function(df, var, ids = c("region","country", "university_abbr")) { 
  rank = rank(-df[[var]], ties.method = "random")
  ranks = data.frame(df[,ids], rank)
  colnames(ranks) = c(ids, var)
  return(ranks)
}

choose_top20_schools = function(df_rank, var, top_n = 10, ids = c("region","country", "university_abbr")) { 
  df_rank_topK = df_rank[df_rank[[var]]<=top_n, c(ids, var)]
  names(df_rank_topK) <- c(ids, "rank")
  df_rank_topK = df_rank_topK[order(df_rank_topK[["rank"]]),]
  return(df_rank_topK)
}

annotate_uni_by_score = function(df_rank, df_score, var, var_out, ids = c("region","country", "university_abbr"), decimal = 0) { 
  df_score = df_score |> dplyr::filter(university_abbr %in% df_rank[["university_abbr"]]) 
  if (decimal==0) df_score[[var]] = as.integer(df_score[[var]]) 
  else df_score[[var]] = round(df_score[[var]], decimal) 
  df_rankscore = dplyr::left_join(df_rank, df_score, by = ids)
  df_rankscore$measure_formatted = trimws(format(df_rankscore[[var]], nsmall=decimal, big.mark=",")) 
  df_rankscore$university_annotated = paste0(trimws(df_rankscore[["university_abbr"]]), " (", df_rankscore[["measure_formatted"]], ")")
  df_rankscore2 = df_rankscore[,c("rank", "university_annotated")]
  names(df_rankscore2) <- c("Rank", var_out)
  return(df_rankscore2)
}

score_to_league = function(DF, Var, Var_out, Top_N = 10, IDs = c("region","country", "university_abbr"),  Decimal = 0) {
  rank_by_score(df = DF, var = Var, ids = IDs) |> 
    choose_top20_schools(var = Var, top_n = Top_N, ids = IDs) |>
    annotate_uni_by_score(df_score = DF, var = Var, var_out = Var_out, ids = IDs, decimal = Decimal)
}

combine_rank_lists = function(ls_rankscores) { 
  df_colcat = Reduce(function(x, y) merge(x, y, by = "Rank", all=TRUE), ls_rankscores)
  return(df_colcat)
}

export_latex_table = function(df_table, path = NULL) { 
  tex = knitr::kable(df_table, format = "latex", booktabs = TRUE, linesep = "")
  writeLines(tex, path)
}

league_to_latex = function(LS_rankscores, Path = NULL) {
  combine_rank_lists(ls_rankscores = LS_rankscores) |>
    export_latex_table(path = Path)
}

combine2stack_regional_ranks = function(ls_ranks, df_ID=IDs_region) { 
  Reduce(
    f = function(x,y) merge(x,y, all=TRUE, by=c("Region","Rank")), x = ls_ranks) |>
    (\(x) dplyr::left_join(df_ID,x,by=c("Region","Rank")))() |>
    dplyr::mutate(
      Citations = case_when(is.na(Citations)~"",TRUE~Citations),
      `Recent Citations` = case_when(is.na(`Recent Citations`)~"",TRUE~`Recent Citations`),
      Impact = case_when(is.na(Impact)~"",TRUE~Impact),
      `Recent Impact` = case_when(is.na(`Recent Impact`)~"",TRUE~`Recent Impact`),
      `Top Publications` = case_when(is.na(`Top Publications`)~"",TRUE~`Top Publications`),
      `Recent Top Publications` = case_when(is.na(`Recent Top Publications`)~"",TRUE~`Recent Top Publications`)
    ) |>
    dplyr::select(Region, Rank, 
                  Citations, Impact, `Top Publications`, 
                  `Recent Citations`, `Recent Impact`, `Recent Top Publications`
    ) |>
    dplyr::select(-Region)
}

make_regional_tables = function(tab_df, out_format="latex") { 
  kableExtra::kbl(
    tab_df, format = out_format, booktabs = TRUE, linesep = ""
  ) |>
    kableExtra::pack_rows(
      index = # c("Europe"=10,"North America"=10,"Asia"=10,"Latin America"=10,"Oceania"=10,"Africa"=2), 
        c("North America"=10,"Europe"=10,"Asia"=10,"Oceania"=10,"Latin America"=10,"Africa"=2), 
      bold=TRUE, italic=TRUE, hline_before = TRUE, hline_after = TRUE)
}

# Define supplemental helpers ---------------------------------------------

abbreviate_regions = function(region) {
  region_abbr = region
  region_abbr[region=="Europe"] <- "EU"
  region_abbr[region=="North America"] <- "NA"
  region_abbr[region=="Latin America"] <- "LA"
  region_abbr[region=="Asia"] <- "AS"
  region_abbr[region=="Africa"] <- "AF"
  region_abbr[region=="Oceania"] <- "OC"
  return(region_abbr)  
}

annotate_uni_by_score_all = function(df_rank, df_score, var, var_out, ids = c("region","country", "university_abbr"), decimal = 0) { 
  df_score = df_score |> dplyr::filter(university_abbr %in% df_rank[["university_abbr"]]) 
  if (decimal==0) df_score[[var]] = as.integer(df_score[[var]]) 
  else df_score[[var]] = round(df_score[[var]], decimal) 
  df_rankscore = dplyr::left_join(df_rank, df_score, by = ids)
  df_rankscore$measure_formatted = trimws(format(df_rankscore[[var]], nsmall=decimal, big.mark=",")) 
  df_rankscore$university_annotated = paste0(trimws(df_rankscore[["university_abbr"]]), " [", abbreviate_regions(df_rankscore[["region"]]), "]", " (", df_rankscore[["measure_formatted"]], ")")
  df_rankscore2 = df_rankscore[,c("rank", "university_annotated")]
  names(df_rankscore2) <- c("Rank", var_out)
  return(df_rankscore2)
}

score_to_league_all = function(DF, Var, Var_out, Top_N = 9999, IDs = c("region","country", "university_abbr"),  Decimal = 0) {
  rank_by_score(df = DF, var = Var, ids = IDs) |> 
    choose_top20_schools(var = Var, top_n = Top_N, ids = IDs) |>
    annotate_uni_by_score_all(df_score = DF, var = Var, var_out = Var_out, ids = IDs, decimal = Decimal)
}

combine2stack_regional_ranks_all = function(ls_ranks) { 
  Reduce(
    f = function(x,y) merge(x,y, all=TRUE, by=c("Rank")), x = ls_ranks) |>
    dplyr::mutate(
      Citations = case_when(is.na(Citations)~"",TRUE~Citations),
      Impact = case_when(is.na(Impact)~"",TRUE~Impact),
      `Top Publications` = case_when(is.na(`Top Publications`)~"",TRUE~`Top Publications`),
      `Recent Citations` = case_when(is.na(`Recent Citations`)~"",TRUE~`Recent Citations`),
      `Recent Impact` = case_when(is.na(`Recent Impact`)~"",TRUE~`Recent Impact`),
      `Recent Top Publications` = case_when(is.na(`Recent Top Publications`)~"",TRUE~`Recent Top Publications`)
    ) 
}

make_regional_tables_all = function(tab_df, out_format="latex") { 
  kableExtra::kbl(
    tab_df, format = out_format, booktabs = TRUE, linesep = "", longtable = TRUE,
    caption = "Caption to fill"
  ) |>
    kableExtra::kable_styling(
      latex_options = c("repeat_header"),
      repeat_header_continued = "\\textit{(Continued on Next Page...)}"
    )
}

# Set region list ---------------------------------------------------------

IDs_region <- data.frame( 
  "Region" =   rep(
    c("North America", "Europe", "Asia", "Oceania", "Latin America", "Africa"),
    times = c(10,10,10,10,10,2)), 
  "Rank" = c(rep(1:10,5),1:2)
)

IDs_region2 <- data.frame( 
  "Region" =   rep(
    c("North America", "Europe", "Asia", "Oceania", "Latin America", "Africa"),
    times = rep(10,6)), 
  "Rank" = c(rep(1:10,6))
)

# Split data by region ----------------------------------------------------

Ndepts_region = dat_falc |> 
  dplyr::group_by(region) |>
  dplyr::summarise(n_schools= length(unique(paste0(country,"-",university_abbr)))) |> 
  dplyr::ungroup() |>
  dplyr::arrange(desc(n_schools))
dat_dept_region <- dat_dept %>%
  split(.$region) %>% 
  {.[Ndepts_region$region]}

# Table 1: Top-10 Departments by Region -----------------------------------

## Make Table 1 columns ----

set.seed(111)
rank_citation_region = dat_dept_region |>
  purrr::map(~score_to_league(DF = .x, Var = "citations", Var_out = "Citations", Top_N = 10)) |>
  dplyr::bind_rows(.id = "Region")

set.seed(111)
rank_citation_recent_region = dat_dept_region |>
  purrr::map(~score_to_league(DF = .x, Var = "citations_recent", Var_out = "Recent Citations", Top_N = 10)) |>
  dplyr::bind_rows(.id = "Region")

set.seed(111)
rank_impact_region = dat_dept_region |>
  purrr::map(~score_to_league(DF = .x, Var = "impact", Var_out = "Impact", Top_N = 10)) |>
  dplyr::bind_rows(.id = "Region")

set.seed(111)
rank_impact_recent_region = dat_dept_region |>
  purrr::map(~score_to_league(DF = .x, Var = "impact_recent", Var_out = "Recent Impact", Top_N = 10)) |>
  dplyr::bind_rows(.id = "Region")

set.seed(111)
rank_top5_region = dat_dept_region |>
  purrr::map(~dplyr::filter(.x, !pub_top==0)) |>
  purrr::map(~score_to_league(DF = .x, Var = "pub_top", Var_out = "Top Publications", Top_N = 10)) |>
  dplyr::bind_rows(.id = "Region")

set.seed(111)
rank_top5_recent_region = dat_dept_region |>
  purrr::map(~dplyr::filter(.x, !pub_top_recent==0)) %>% 
  {.[!grepl("Africa", names(.))]} |>
  purrr::map(~score_to_league(DF = .x, Var = "pub_top_recent", Var_out = "Recent Top Publications", Top_N = 10)) %>% 
  dplyr::bind_rows(.id = "Region")

rank_top5_recent_region[rank_top5_recent_region$Region=='Asia',]
# Seoul National, Duke Kunshan tied with two recent top publications each; check other potential ties below
dat_dept_region$Asia |>
  dplyr::filter(pub_top_recent==2) |>
  dplyr::select(university, pub_top_recent)
# two other tied schools: Tsinghua, Osaka

# flag random ties with asteriks
rank_top5_recent_region$`Recent Top Publications`[rank_top5_recent_region$`Recent Top Publications` == "Seoul National (2)"] <- "Seoul National (2)*"
rank_top5_recent_region$`Recent Top Publications`[rank_top5_recent_region$`Recent Top Publications` == "Duke Kunshan (2)"] <- "Duke Kunshan (2)*"

## Make Table 1 full table ----

rank_TabI <- list(
  rank_citation_region, rank_impact_region, rank_top5_region,
  rank_citation_recent_region, rank_impact_recent_region, rank_top5_recent_region
) |>
  combine2stack_regional_ranks() |>
  make_regional_tables()

## Export Table 1 ----

if (export_tables) writeLines(rank_TabI, "Tables/Table1.tex")  

# Table 2: Top-10 Per Faculty by Region -----------------------------------

## Make Table 2 columns ----

set.seed(111)
rank_citation_region2 = dat_dept_region |>
  purrr::map(~score_to_league(DF = .x, Var = "citations_pf", Var_out = "Citations", Top_N = 10)) |>
  dplyr::bind_rows(.id = "Region")

set.seed(111)
rank_citation_recent_region2 = dat_dept_region |>
  purrr::map(~score_to_league(DF = .x, Var = "citations_recent_pf", Var_out = "Recent Citations", Top_N = 10)) |>
  dplyr::bind_rows(.id = "Region")

set.seed(111)
rank_impact_region2 = dat_dept_region |>
  purrr::map(~score_to_league(DF = .x, Var = "impact_pf", Var_out = "Impact", Top_N = 10, Decimal = 2)) |>
  dplyr::bind_rows(.id = "Region")

set.seed(111)
rank_impact_recent_region2 = dat_dept_region |>
  purrr::map(~score_to_league(DF = .x, Var = "impact_recent_pf", Var_out = "Recent Impact", Top_N = 10, Decimal = 2)) |>
  dplyr::bind_rows(.id = "Region")

set.seed(111)
rank_top5_region2 = dat_dept_region |>
  purrr::map(~dplyr::filter(.x, !pub_top==0)) |>
  purrr::map(~score_to_league(DF = .x, Var = "pub_top_pf", Var_out = "Top Publications", Top_N = 10, Decimal = 2)) |>
  dplyr::bind_rows(.id = "Region")

set.seed(111)
rank_top5_recent_region2 = dat_dept_region |>
  purrr::map(~dplyr::filter(.x, !pub_top_recent==0)) %>% 
  {.[!grepl("Africa", names(.))]} |>
  purrr::map(~score_to_league(DF = .x, Var = "pub_top_recent_pf", Var_out = "Recent Top Publications", Top_N = 10, Decimal = 2)) %>% 
  dplyr::bind_rows(.id = "Region")

## Make Table 2 full table ----

rank_TabII <- list(
  rank_citation_region2, rank_impact_region2, rank_top5_region2,
  rank_citation_recent_region2, rank_impact_recent_region2, rank_top5_recent_region2
) |>
  combine2stack_regional_ranks() |>
  make_regional_tables()

## Export Table 2 ----

if (export_tables) writeLines(rank_TabII, "Tables/Table2.tex")  

# Table 3: Top Scholars by Region -----------------------------------------

## Read faculty-PhD location mapping file ----

dict_phdregion <- purrr::map(
  c('Citations', 'Impact', 'Top Publications', 
    'Recent Citations', 'Recent Impact', 'Recent Top Publications'),
  ~readxl::read_xlsx('Dictionary_top_scholars.xlsx', sheet = .x)
) |>
  `names<-`(c('Citations', 'Impact', 'Top Publications', 
              'Recent Citations', 'Recent Impact', 'Recent Top Publications'))

## Define Table 3 helpers ----

rank_faculties_by_metric = function(df_score = dat_falc, df_loc, metric, top10_only = TRUE, id_region = IDs_region2) {
  metric = enquo(metric)
  df_rank = df_score |>
    dplyr::filter(!!metric > 0) |>
    dplyr::group_by(region) |>
    dplyr::mutate(
      rank = rank(-!!metric, ties.method = "random")
    ) |>
    dplyr::arrange(rank) |>
    dplyr::ungroup() |>
    dplyr::arrange(region, rank) |>
    dplyr::left_join(df_loc, by = c('region' = 'Region', 'name' = 'Name')) |>
    dplyr::mutate(name_shown = ifelse(is.na(Name2), name, Name2)) |>
    dplyr::mutate(author_score = paste0(name_shown, " (", round(!!metric,2), ") [", PhD_Region, "]")) |>
    dplyr::select(region, rank, author_score)
  if (top10_only) df_rank = df_rank |>
    dplyr::filter(rank <= 10) |>
    dplyr::select(Region = region, Rank = rank, Scientist = author_score) %>%
    dplyr::left_join(id_region, ., by=c("Region","Rank"))
  return(df_rank)
}

## Make Table 3 columns ----

set.seed(111)
rank_falc_citations_top10 = rank_faculties_by_metric(metric = citations, df_loc = dict_phdregion$Citations)

set.seed(111)
rank_falc_impact_top10 = rank_faculties_by_metric(metric = impact, df_loc = dict_phdregion$Impact)

set.seed(111)
rank_falc_top5_top10 = rank_faculties_by_metric(metric = pub_top, df_loc = dict_phdregion$`Top Publications`)

dat_falc |> # manually check Asian ties
  dplyr::filter(region=='Asia' & pub_top==4) |>
  dplyr::left_join(dict_phdregion$`Top Publications`, by = c('region' = 'Region', 'name' = 'Name')) |>
  dplyr::select(university, name, name2 = Name2, phd_origin = PhD_Region, pub_top) |>
  dplyr::arrange(name)

rank_falc_top5_top10$`Scientist`[rank_falc_top5_top10$`Scientist` == "Woo Sang Kim (4) [NA]"] <- "Woo Sang Kim (4)* [NA]"
rank_falc_top5_top10$`Scientist`[rank_falc_top5_top10$`Scientist` == "Giuliana Pardelli (4) [NA]"] <- "Giuliana Pardelli (4)* [NA]"

set.seed(111)
rank_falc_citations_recent_top10 = rank_faculties_by_metric(metric = citations_recent, df_loc = dict_phdregion$`Recent Citations`)

dat_falc |> # manually check European ties
  dplyr::filter(region=='Europe' & citations_recent==1831) |>
  dplyr::left_join(dict_phdregion$`Recent Citations`, by = c('region' = 'Region', 'name' = 'Name')) |>
  dplyr::select(university, name, name2 = Name2, phd_origin = PhD_Region, citations_recent) |>
  dplyr::arrange(name)

rank_falc_citations_recent_top10$`Scientist`[rank_falc_citations_recent_top10$`Scientist` == "Sara Hobolt (1831) [EU]"] <- "Sara Hobolt (1831)* [EU]"

set.seed(111)
rank_falc_impact_recent_top10 = rank_faculties_by_metric(metric = impact_recent, df_loc = dict_phdregion$`Recent Impact`)

set.seed(111)
rank_falc_top5_recent_top10 = rank_faculties_by_metric(metric = pub_top_recent, df_loc = dict_phdregion$`Recent Top Publications`)

dat_falc |> # manually check North American ties
  dplyr::filter(region=='North America' & pub_top_recent==8) |>
  dplyr::left_join(dict_phdregion$`Recent Top Publications`, by = c('region' = 'Region', 'name' = 'Name')) |>
  dplyr::select(university, name, name2 = Name2, phd_origin = PhD_Region, pub_top_recent) |>
  dplyr::arrange(name)

rank_falc_top5_recent_top10$`Scientist`[rank_falc_top5_recent_top10$`Scientist`=='Edmund Malesky (8) [NA]'] <- 'Edmund Malesky (8)* [NA]'
rank_falc_top5_recent_top10$`Scientist`[rank_falc_top5_recent_top10$`Scientist`=='Nikhar Gaikwad (8) [NA]'] <- 'Nikhar Gaikwad (8)* [NA]'

dat_falc |> # manually check European ties
  dplyr::filter(region=='Europe' & pub_top_recent==5) |>
  dplyr::left_join(dict_phdregion$`Recent Top Publications`, by = c('region' = 'Region', 'name' = 'Name')) |>
  dplyr::select(university, name, name2 = Name2, phd_origin = PhD_Region, pub_top_recent) |>
  dplyr::arrange(name)

rank_falc_top5_recent_top10$`Scientist`[rank_falc_top5_recent_top10$`Scientist`=='Martin Vinæs Larsen (5) [EU]'] <- 'Martin Vinæs Larsen (5)* [EU]'
rank_falc_top5_recent_top10$`Scientist`[rank_falc_top5_recent_top10$`Scientist`=='Gabor Simonovits (5) [NA]'] <- 'Gabor Simonovits (5)* [NA]'

dat_falc |> # manually check Oceanic ties
  dplyr::filter(region=='Oceania' & pub_top_recent==1) |>
  dplyr::left_join(dict_phdregion$`Recent Top Publications`, by = c('region' = 'Region', 'name' = 'Name')) |>
  dplyr::select(university, name, name2 = Name2, phd_origin = PhD_Region, pub_top_recent) |>
  dplyr::arrange(name)

rank_falc_top5_recent_top10$`Scientist`[rank_falc_top5_recent_top10$`Scientist`=='Jill Sheppard (1) [OC]'] <- 'Jill Sheppard (1)* [OC]'
rank_falc_top5_recent_top10$`Scientist`[rank_falc_top5_recent_top10$`Scientist`=='Jana von Stein (1) [NA]'] <- 'Jana von Stein (1)* [NA]'
rank_falc_top5_recent_top10$`Scientist`[rank_falc_top5_recent_top10$`Scientist`=='Ian McAllister (1) [EU]'] <- 'Ian McAllister (1)* [EU]'
rank_falc_top5_recent_top10$`Scientist`[rank_falc_top5_recent_top10$`Scientist`=='William Bosworth (1) [EU^]'] <- 'William Bosworth (1)* [EU^]'

dat_falc |> # manually check Latin America ties
  dplyr::filter(region=='Latin America' & pub_top_recent==1) |>
  dplyr::left_join(dict_phdregion$`Recent Top Publications`, by = c('region' = 'Region', 'name' = 'Name')) |>
  dplyr::select(university, name, name2 = Name2, phd_origin = PhD_Region, pub_top_recent) |>
  dplyr::arrange(name)

rank_falc_top5_recent_top10$`Scientist`[rank_falc_top5_recent_top10$`Scientist`=='Eric Magar Meurs (1) [NA]'] <- 'Eric Magar Meurs (1)* [NA]'
rank_falc_top5_recent_top10$`Scientist`[rank_falc_top5_recent_top10$`Scientist`=='Valentín Figueroa (1) [NA]'] <- 'Valentín Figueroa (1)* [NA]'
rank_falc_top5_recent_top10$`Scientist`[rank_falc_top5_recent_top10$`Scientist`=='Sebastián Vallejo Vera (1) [NA]'] <- 'Sebastián Vallejo Vera (1)* [NA]'
rank_falc_top5_recent_top10$`Scientist`[rank_falc_top5_recent_top10$`Scientist`=='Juan Pablo Micozzi (1) [NA]'] <- 'Juan Pablo Micozzi (1)* [NA]'

## Make Table 3 full table ----

rank_Tab3 =
  Reduce(
    cbind.data.frame,
    list(
      IDs_region2[,"Rank"],
      rank_falc_citations_top10[,"Scientist"], 
      rank_falc_impact_top10[,"Scientist"],
      rank_falc_top5_top10[,"Scientist"],
      rank_falc_citations_recent_top10[,"Scientist"],
      rank_falc_impact_recent_top10[,"Scientist"],
      rank_falc_top5_recent_top10[,"Scientist"]
    )
  ) |>
  `names<-`(c("Rank", "Citations", "Impact","Top Publications", "Recent Citations", "Recent Impact", "Recent Top Publications")) |>
  dplyr::mutate(
    Citations = case_when(is.na(Citations)~"",TRUE~Citations),
    Impact = case_when(is.na(Impact)~"",TRUE~Impact),
    `Top Publications` = case_when(is.na(`Top Publications`)~"",TRUE~`Top Publications`),
    `Recent Citations` = case_when(is.na(`Recent Citations`)~"",TRUE~`Recent Citations`),
    `Recent Impact` = case_when(is.na(`Recent Impact`)~"",TRUE~`Recent Impact`),
    `Recent Top Publications` = case_when(is.na(`Recent Top Publications`)~"",TRUE~`Recent Top Publications`)
  ) |>
  kableExtra::kbl(
    format = "latex", booktabs = TRUE, linesep = ""
  ) |>
  kableExtra::pack_rows(
    index = c("North America"=10,"Europe"=10,"Asia"=10,"Oceania"=10,"Latin America"=10,"Africa"=10),
    bold=TRUE, italic=TRUE, hline_before = TRUE, hline_after = TRUE)

## Export Table 3 ----

if (export_tables) writeLines(rank_Tab3, "Tables/Table3.tex")  

# Appendix Table A1: Listing Universities ----------------------------

# These data come directly from the QS website and can be found here:
# https://www.topuniversities.com/world-university-rankings/2022

# Appendix Table A2: Global Department Ranking ----------------------------

## Make Table A2 columns ----

rank_citation_region_all = dat_dept_region |>
  dplyr::bind_rows(.id = "Region") |>
  score_to_league_all(Var = "citations", Var_out = "Citations")
rank_citation_recent_region_all = dat_dept_region |>
  dplyr::bind_rows(.id = "Region") |>
  score_to_league_all(Var = "citations_recent", Var_out = "Recent Citations")
rank_impact_region_all = dat_dept_region |>
  dplyr::bind_rows(.id = "Region") |>
  score_to_league_all(Var = "impact", Var_out = "Impact")
rank_impact_recent_region_all = dat_dept_region |>
  dplyr::bind_rows(.id = "Region") |>
  score_to_league_all(Var = "impact_recent", Var_out = "Recent Impact")
rank_top5_region_all = dat_dept_region |>
  dplyr::bind_rows(.id = "Region") |>
  dplyr::filter(!pub_top==0) |>
  score_to_league_all(Var = "pub_top", Var_out = "Top Publications")
rank_top5_recent_region_all = dat_dept_region |>
  purrr::map(~dplyr::filter(.x, !pub_top_recent==0)) %>% 
  dplyr::bind_rows(.id = "Region") |>
  score_to_league_all(Var = "pub_top_recent", Var_out = "Recent Top Publications")

## Make Table A2 full table ----

rank_TabA2 <- list(
  rank_citation_region_all, rank_impact_region_all, rank_top5_region_all,
  rank_citation_recent_region_all, rank_impact_recent_region_all, rank_top5_recent_region_all
) |>
  combine2stack_regional_ranks_all() |>
  make_regional_tables_all()

## Export Table A2 ----

if (export_tables) writeLines(rank_TabA2, "Tables/TableA2.tex")  

# Table A3: Global Ranking Per Faculty ------------------------------------

## Make Table A3 columns ----

rank_citation_region_all2 = dat_dept_region |>
  dplyr::bind_rows(.id = "Region") |>
  score_to_league_all(Var = "citations_pf", Var_out = "Citations")
rank_citation_recent_region_all2 = dat_dept_region |>
  dplyr::bind_rows(.id = "Region") |>
  score_to_league_all(Var = "citations_recent_pf", Var_out = "Recent Citations")
rank_impact_region_all2 = dat_dept_region |>
  dplyr::bind_rows(.id = "Region") |>
  score_to_league_all(Var = "impact_pf", Var_out = "Impact", Decimal = 2)
rank_impact_recent_region_all2 = dat_dept_region |>
  dplyr::bind_rows(.id = "Region") |>
  score_to_league_all(Var = "impact_recent_pf", Var_out = "Recent Impact", Decimal = 2)
rank_top5_region_all2 = dat_dept_region |>
  dplyr::bind_rows(.id = "Region") |>
  dplyr::filter(!pub_top==0) |>
  score_to_league_all(Var = "pub_top_pf", Var_out = "Top Publications", Decimal = 2)
rank_top5_recent_region_all2 = dat_dept_region |>
  purrr::map(~dplyr::filter(.x, !pub_top_recent==0)) %>% 
  dplyr::bind_rows(.id = "Region") |>
  score_to_league_all(Var = "pub_top_recent_pf", Var_out = "Recent Top Publications", Decimal = 2)

## Make Table A3 full table ----

rank_TabA3 <- list(
  rank_citation_region_all2, rank_impact_region_all2, rank_top5_region_all2,
  rank_citation_recent_region_all2, rank_impact_recent_region_all2, rank_top5_recent_region_all2
) |>
  combine2stack_regional_ranks_all() |>
  make_regional_tables_all()

## Export Table A3 ----

if (export_tables) writeLines(rank_TabA3, "Tables/TableA3.tex")  

# Appendix Table A4: Top-10 by Historical Impact & Coauthors --------------

## Make Table A4 columns ----

rank_citation_historic_ca_region = dat_dept_region |>
  purrr::map(~score_to_league(DF = .x, Var = "citations_historic_ca", Var_out = "Citations")) |>
  dplyr::bind_rows(.id = "Region")
rank_citation_recent_historic_ca_region = dat_dept_region |>
  purrr::map(~score_to_league(DF = .x, Var = "citations_recent_historic_ca", Var_out = "Recent Citations")) |>
  dplyr::bind_rows(.id = "Region")
rank_impact_historic_ca_region = dat_dept_region |>
  purrr::map(~score_to_league(DF = .x, Var = "impact_historic_ca", Var_out = "Impact")) |>
  dplyr::bind_rows(.id = "Region")
rank_impact_recent_historic_ca_region = dat_dept_region |>
  purrr::map(~score_to_league(DF = .x, Var = "impact_recent_historic_ca", Var_out = "Recent Impact")) |>
  dplyr::bind_rows(.id = "Region")
rank_top5_historic_ca_region = dat_dept_region |>
  purrr::map(~dplyr::filter(.x, !pub_top_historic_ca==0)) %>% 
  {.[!grepl("Africa", names(.))]} |>
  purrr::map(~score_to_league(DF = .x, Var = "pub_top_historic_ca", Var_out = "Top Publications", Top_N = 10, Decimal = 1)) |>
  dplyr::bind_rows(.id = "Region")
rank_top5_recent_historic_ca_region = dat_dept_region |>
  purrr::map(~dplyr::filter(.x, !pub_top_recent_historic_ca==0)) %>% 
  {.[!grepl("Africa", names(.))]} |>
  purrr::map(~score_to_league(DF = .x, Var = "pub_top_recent_historic_ca", Var_out = "Recent Top Publications", Top_N = 10, Decimal = 1)) |>
  dplyr::bind_rows(.id = "Region")

## Make Table A4 full table ----

rank_TabA4 <- list(
  rank_citation_historic_ca_region, rank_impact_historic_ca_region, rank_top5_historic_ca_region,
  rank_citation_recent_historic_ca_region, rank_impact_recent_historic_ca_region, rank_top5_recent_historic_ca_region
) |>
  combine2stack_regional_ranks() |>
  make_regional_tables()

## Export Table A4 ---- 

if (export_tables) writeLines(rank_TabA4, "Tables/TableA4.tex")  

# Appendix Table A5: Top-10 Per Faculty by Historical Impact & Coauthors ----

## Make Table A5 columns ----

rank_citation_historic_ca_region2 = dat_dept_region |>
  purrr::map(~score_to_league(DF = .x, Var = "citations_historic_ca_pf", Var_out = "Citations")) |>
  dplyr::bind_rows(.id = "Region")
rank_citation_recent_historic_ca_region2 = dat_dept_region |>
  purrr::map(~score_to_league(DF = .x, Var = "citations_recent_historic_ca_pf", Var_out = "Recent Citations")) |>
  dplyr::bind_rows(.id = "Region")
rank_impact_historic_ca_region2 = dat_dept_region |>
  purrr::map(~score_to_league(DF = .x, Var = "impact_historic_ca_pf", Var_out = "Impact")) |>
  dplyr::bind_rows(.id = "Region")
rank_impact_recent_historic_ca_region2 = dat_dept_region |>
  purrr::map(~score_to_league(DF = .x, Var = "impact_recent_historic_ca_pf", Var_out = "Recent Impact")) |>
  dplyr::bind_rows(.id = "Region")
rank_top5_historic_ca_region2 = dat_dept_region |>
  purrr::map(~dplyr::filter(.x, !pub_top_historic_ca==0)) %>% 
  {.[!grepl("Africa", names(.))]} |>
  purrr::map(~score_to_league(DF = .x, Var = "pub_top_historic_ca_pf", Var_out = "Top Publications", Top_N = 10, Decimal = 2)) |>
  dplyr::bind_rows(.id = "Region")
rank_top5_recent_historic_ca_region2 = dat_dept_region |>
  purrr::map(~dplyr::filter(.x, !pub_top_recent_historic_ca==0)) %>% 
  {.[!grepl("Africa", names(.))]} |>
  purrr::map(~score_to_league(DF = .x, Var = "pub_top_recent_historic_ca_pf", Var_out = "Recent Top Publications", Top_N = 10, Decimal = 2)) |>
  dplyr::bind_rows(.id = "Region")

## Make Table A5 full table ----

rank_TabA5 <- list(
  rank_citation_historic_ca_region2, rank_impact_historic_ca_region2, rank_top5_historic_ca_region2,
  rank_citation_recent_historic_ca_region2, rank_impact_recent_historic_ca_region2, rank_top5_recent_historic_ca_region2
) |>
  combine2stack_regional_ranks() |>
  make_regional_tables()

## Export Table A5 ----

if (export_tables) writeLines(rank_TabA5, "Tables/TableA5.tex")  

# Appendix Table A18: Top Cited Publications ------------------------------

rank_pub_citations = dat_pub |>
  dplyr::group_by(region) |>
  dplyr::mutate(rank_citation = rank(-citations, ties.method = "random")) |>
  dplyr::arrange(rank_citation) |>
  dplyr::ungroup() |>
  dplyr::arrange(region, rank_citation) |>
  dplyr::mutate(
    publisher_journal = stringr::str_to_title(source)
  ) |>
  dplyr::mutate(publisher_journal = stringr::str_replace_all(publisher_journal, "Of", "of"))

rank_pub_citations_top10 = rank_pub_citations |> 
  dplyr::filter(rank_citation<=10) |>
  dplyr::select(Region = region, 
                Rank = rank_citation, 
                Publication = publication, 
                'Publisher/Journal' = publisher_journal, 
                `Total Citations` = citations) %>% 
  dplyr::left_join(IDs_region2, ., by=c("Region","Rank")) 

rank_pub_citations_top10_augmented <- rank_pub_citations_top10 |>
  tibble::add_column(
    Author = c(
      # North American authors
      'Andrew Gelman', 'Jane Bennett', 'Andrew Gelman', 'Edward G. Carmines', 'Alexander Wendt', 
      'Henry Brady', 'Gary King', 'John Mearsheimer', 'Peter A. Hall', 'Alexander Wendt',
      # European authors
      'Ole Wæver', 'Bruce Robert Hoffman', 'Jan Aart Scholte', 'Stathis N. Kalyvas', 'Ian Manners',
      'Alexandra Segerberg', 'Patrick Paul Walsh', 'Thomas Risse', 'Mark Bovens', 'Jason Reifler',
      # Asian authors
      'Yu-Sung Su', 'Alfred Tat-Kei Ho', 'Haohan Chen', 'David Levi Faur', 'T.K. Ahn',
      'Avner De-Shalit', 'Abdul Noury', 'Pierre Landry', 'Chandran Kukathas', 'Chandran Kukathas',
      # Oceanic authors
      'Anthony Zwi', 'David Schlosberg', 'John Keane', 'Brian Head', 'Michael Mintrom', 
      'John Keane', 'James Der Derian', 'Matt McDonald', 'David Schlosberg', 'Michael Mintrom',
      # Latin American authors
      'Cristóbal Rovira Kaltwasser', 'Jose Antonio Puppim De Oliveira', 'Evelina Dagnino', 'Marta Teresa da Silva Arretche', 'Fernando Luiz Abrucio',
      'David Altman', 'Fernando Luiz Abrucio', 'Alejandro Moreno Álvarez', 'Alberto Simpser', 'Marta Teresa da Silva Arretche',
      # African authors
      'Maye Kassem', 'Anthony Butler', 'Anthony Butler', 'Rabab El Mahdi', 'Rabab El Mahdi',
      'Maye Kassem', 'Walid Kazziha', 'Ibrahim Elnur', 'Nadine Sika', 'Anthony Butler'
    ),
    .after = 'Publication'
  ) 

rank_pub_citations_top10_augmented$`Publisher/Journal`[
  rank_pub_citations_top10_augmented$Publication=='The logic of connective action: Digital media and the personalization of contentious politics'] <- 'Cambridge University Press'

rank_pub_citations_top10_augmented$`Publisher/Journal`[rank_pub_citations_top10_augmented$`Publisher/Journal`%in%c('',' ')] <- c(
  # missing publishers for North America
  'Harvard University Press', 'Cambridge University Press', 'Harvard University Press',
  # missing publishers for Europe
  'Lynne Rienner Publishers', 
  # missing publishers for Latin America
  'Editora Hucitec',
  # missing publishers for Africa
  'American University in Cairo Press', 'Ithaca Press', 'New York: St. Martin’s Press', 'Routledge'
)

rank_pub_citations_top10_augmented$`Country of Location` <- c(
  # North American CoOrs
  'United Kingdom', 'United States', 'United Kingdom', 'United States', 'United Kingdom',
  'United States', 'United States', 'United States', 'United Kingdom', 'United States',
  # European CoOrs
  'United States', 'United States', 'United Kingdom', 'United Kingdom', 'United Kingdom',
  'United Kingdom', 'United States','United States','United Kingdom','United States',
  # Asian CoOrs
  'United States', 'United States', 'United States', 'United States', 'Netherlands',
  'United Kingdom', 'United Kingdom', 'United Kingdom', 'United States','United Kingdom',
  # Oceanic CoOrs
  'United Kingdom','United Kingdom','United Kingdom','United States','United States',
  'United States','United Kingdom','United Kingdom','United Kingdom','United States',
  # Latin American CoOrs
  'United Kingdom','United Kingdom','Brazil','Brazil','Brazil',
  'United States','Brazil','United Kingdom','United Kingdom','Brazil',
  # African CoOrs
  'United States', 'United Kingdom', 'United Kingdom', 'United Kingdom', 'Egypt', 
  'United Kingdom', 'United States','United Kingdom','United Kingdom','United Kingdom'
)

rank_TabA18 = kableExtra::kbl(
  rank_pub_citations_top10_augmented |> 
    dplyr::select(-Region) |>
    dplyr::select(Rank, Publication, Author, `Publisher/Journal`, `Country of Location`, `Total Citations`), 
  format = "latex", booktabs = TRUE, linesep = ""
) |>
  kableExtra::pack_rows(
    index = c("North America"=10,"Europe"=10,"Asia"=10,"Oceania"=10,"Latin America"=10,"Africa"=10),
    bold=TRUE, italic=TRUE, hline_before = TRUE, hline_after = TRUE)

# NOTE: Some very small and local typos or capitalization preferences (to upper/lower cases) in publication and/or publisher names are corrected in the exported TEX file below
if (export_tables) writeLines(rank_TabA18, "Tables/TableA18.tex")  

# END #
